********************************************************************************
*** Inquiry Data

*first grab all of the inquiry data and append it all together;
*then reshape and then save separately

*inquiries
use "data/inputs/Clarity/Clarity_Sample_Inquiries_Return_to_Harvard_20200724.dta", clear
gen clar_samp = "Full Clarity"
append using "data/inputs/Clarity/Harvard_Sample_Inquiries_Return_From_Clarity_20200724.dta"
replace clar_samp = "Experian Match" if mi(clar_samp)
append using "data/inputs/Clarity/Members_Inquiries_Return_to_Harvard_20200724.dta"
replace clar_samp = "Hugo Match" if mi(clar_samp)

bysort harvard_identity_id clar_samp record_nb inquiry_tradeline_type: gen numinquiry = _N

replace inquiry_received_date = subinstr(inquiry_received_date, "-", "", .)
destring inquiry_received_date, replace

collapse (median) net_monthly_income months_at_address age ///
	(firstnm) housing_status pay_frequency zip_code state (mean) numinquiry ///
	(min) firstinq = inquiry_received_date (max) lastinq = inquiry_received_date, ///
	by(harvard_identity_id clar_samp record_nb inquiry_tradeline_type)

*keep median income, months at address, age and modal location
bysort harvard_identity_id clar_samp record_nb: gen nval = _n
foreach var of varlist net_monthly_income months_at_address age {
	bysort harvard_identity_id clar_samp record_nb: egen k`var' = median(`var')
	drop `var'
	rename k`var' `var'
}
foreach var of varlist zip_code state {
	bysort harvard_identity_id clar_samp record_nb: egen k`var' = mode(`var'), minmode
	drop `var'
	rename k`var' `var'
}

*tag first an last inquiries
bysort harvard_identity_id clar_samp record_nb: egen kfirstinq = min(firstinq)
drop firstinq
rename kfirstinq firstinq
bysort harvard_identity_id clar_samp record_nb: egen klastinq = max(lastinq)
drop lastinq
rename klastinq lastinq

drop nval housing_status pay_frequency

reshape wide numinquiry, i(harvard_identity_id clar_samp record_nb state) j(inquiry_tradeline_type) string

egen numinquiry_all = rowtotal(numinquiry*)
gen clar_totinq = numinquiry_all

isid harvard_identity_id clar_samp record_nb, missok

tempfile inq
save `inq'


********************************************************************************
*** Tradeline Data

*tradelines
use "data/inputs/Clarity/Clarity_Sample_Tradelines_Return_to_Harvard_20200724.dta", clear
gen clar_samp = "Full Clarity"
append using "data/inputs/Clarity/Harvard_Sample_Tradelines_Return_From_Clarity_20200724.dta"
replace clar_samp = "Experian Match" if mi(clar_samp)
append using "data/inputs/Clarity/Members_Tradelines_Return_to_Harvard_20200724.dta"
replace clar_samp = "Hugo Match" if mi(clar_samp)

*closed accounts
gen closedacc = 0
replace closedacc = 1 if !mi(closed_date)
gen openacc = 1 - closedacc
gen delinqacc = 0
replace delinqacc = 1 if !mi(delinquency_date)

*account date
replace account_opened = subinstr(account_opened, "-", "", .)
destring account_opened, replace

collapse (sum) closedacc openacc delinqacc ///
	(min) firstacc = account_opened (max) lastacc = account_opened ///
	(sum) balances = current_balance allcredits = highest_credit ///
	(firstnm) zip_code, ///
	by(harvard_identity_id clar_samp record_nb)
	
gen totacc = closedacc + openacc
gen clar_opentrades = openacc
gen clar_deltrades = delinqacc
gen clar_util = balances/allcredits
gen clar_limits = allcredits
gen clar_balance = balances

********************************************************************************
*** Merge and Save

merge 1:1 record_nb clar_samp harvard_identity_id using `inq', gen(_m_tradinq)

gen source = "experian match sample"
replace source = "non experian sample" if mi(record_nb)
replace source = "members sample" if record_nb < 10000 & !mi(record_nb)

preserve
	keep if source == "non experian sample"
	save "data/int/clarity/nonexp clarity.dta", replace
restore

preserve
	keep if source == "members sample"
	save "data/int/clarity/members clarity.dta", replace
restore

preserve
	keep if source == "experian match sample"
	save "data/int/clarity/exp clarity.dta", replace
restore

********************************************************************************
*** Merge with hugo with Experian

use "data/int/clarity/members clarity", clear
append using "data/int/clarity/exp clarity"

tempfile clar
save `clar'

use "data/int/hugo and experian.dta", clear

*merge what merges
merge 1:1 record_nb using `clar', gen(_m_clarity)
*append that which does not merge
append using "data/int/clarity/nonexp clarity"

*make sure no double nobs within source
bysort harvard_identity_id source: gen nobs = _N
assert nobs == 1 if !mi(source)
drop nobs

*** fill in missing experian inquiries and tradelines as zero
foreach var of varlist clar_opentrades clar_deltrades clar_totinq clar_limits clar_balance totinq {
	replace `var' = 0 if mi(`var') & (ussamp==1 | appliedhugo==1)
}

replace isconstrained = 1 if mi(isconstrained) & (ussamp==1 | appliedhugo==1)

save "data/cleaned/hugo and expclar.dta", replace

keep if !mi(cohort)
count
assert `r(N)' == 1537

save "data/cleaned/collapsed action with credit.dta", replace

